{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 基尼系数\n",
    "![基尼系数图示](images/基尼系数图示.png)\n",
    "+ 基尼系数越高，数据整体不确定性越强\n",
    "+ 基尼系数越低，数据整体确定性越强\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn import datasets\n",
    "\n",
    "iris = datasets.load_iris()\n",
    "X = iris.data[:,2:]\n",
    "y = iris.target"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=2,\n",
       "                       max_features=None, max_leaf_nodes=None,\n",
       "                       min_impurity_decrease=0.0, min_impurity_split=None,\n",
       "                       min_samples_leaf=1, min_samples_split=2,\n",
       "                       min_weight_fraction_leaf=0.0, presort=False,\n",
       "                       random_state=42, splitter='best')"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.tree import DecisionTreeClassifier\n",
    "\n",
    "dt_clf = DecisionTreeClassifier(max_depth=2, criterion=\"gini\", random_state=42) # 信息熵时criterion=\"entropy\"\n",
    "dt_clf.fit(X, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def plot_decision_boundary(model, axis):\n",
    "    \n",
    "    x0, x1 = np.meshgrid(\n",
    "        np.linspace(axis[0], axis[1], int((axis[1]-axis[0])*200)).reshape(-1, 1),\n",
    "        np.linspace(axis[2], axis[3], int((axis[3]-axis[2])*200)).reshape(-1, 1),\n",
    "    )\n",
    "    X_new = np.c_[x0.ravel(), x1.ravel()]\n",
    "\n",
    "    y_predict = model.predict(X_new)\n",
    "    zz = y_predict.reshape(x0.shape)\n",
    "\n",
    "    from matplotlib.colors import ListedColormap\n",
    "    custom_cmap = ListedColormap(['#EF9A9A','#FFF59D','#90CAF9'])\n",
    "    \n",
    "    plt.contourf(x0, x1, zz, cmap=custom_cmap)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAD8CAYAAABn919SAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAcIElEQVR4nO3dfZAc9X3n8fd3H5DQsmLBkpBAMrILLjbmghQ2MrauXFTscIAo43LMnXw+n+2ioquc8UGSqlSwrng6O04qVfapjM8uBbhA4oMYhH1KEDF2mYQHFw8rWcSAuLIOm2gjoQX0tHqE3f3eH9O7O9vbs9PT0zP9MJ9X1RbTv/l1zxdR+6XV/Zlfm7sjIiLF15V1ASIikg41dBGRklBDFxEpCTV0EZGSUEMXESkJNXQRkZKo29DNbL6ZPWdmL5jZS2Z2e8SceWb2N2a228yeNbOVrShWRERqi3OGfgr4LXe/BFgFXGlml4XmXA8cdPcLgG8Af5ZumSIiUk/dhu4VR4PN3uAn/G2ka4F7g9cPAR81M0utShERqasnziQz6wa2AxcA33L3Z0NTzgP2ALj7mJkdBt4FvBk6zgZgA0DfvHmXXrh0aXPVS0PeOv1dWZcgIk3a88qON919cdR7sRq6u48Dq8xsAPi+mV3s7i9WTYk6G5+1poC7bwY2A6xeudIf37gxzsdLSu5d9bmsSxCRJt20Zt5rtd5rKOXi7oeAfwCuDL01DKwAMLMe4EzgQENViohIU+KkXBYHZ+aY2enAx4BXQtO2ApOnf58CfuJa9UtEpK3iXHJZBtwbXEfvAr7n7n9nZncAQ+6+Fbgb+Csz203lzHx9yyoWEZFIdRu6u/8TsDpi/Jaq1yeB69ItTUREGqFvioqIlIQauohISaihi4iUhBq6iEhJqKGLiJSEGrqISEmooYuIlIQauohISaihi4iUhBq6iEhJqKGLiJSEGrqISEmooYuIlIQauohISaihi4iUhBq6iEhJqKGLiJSEGrqISEmooYuIlIQauohISaihi4iUhBq6iEhJqKGLiJSEGrqISEmooYuIlIQauohISaihi4iURN2GbmYrzOxxM9tlZi+Z2Y0Rcy43s8NmtjP4uaU15YqISC09MeaMAX/o7jvMrB/YbmY/cveXQ/OedPdr0i9RRIpk96FHGRq5k6Nj+zmj5xwGl9zABQNXJdoPSHSsTlW3obv7PmBf8HrUzHYB5wHhhi4iHW73oUd5ct9XGPeTABwde50n930FYM5GHLXfP+69DTNjwt9p6FidrKFr6Ga2ElgNPBvx9ofM7AUze9TMPpBCbSJSMEMjd0415UnjfpKhkTsb3s8Zm2rmjRyrk8W55AKAmZ0BbAFucvcjobd3AOe7+1Ezuxr4AXBhxDE2ABsAlp99duKiRSSfjo7tb2g87vtJ53aaWGfoZtZLpZl/190fDr/v7kfc/WjwehvQa2aLIuZtdvdBdx9c1N/fZOkikjdn9JzT0Hjc95PO7TRxUi4G3A3scvev15izNJiHma0JjvtWmoWKSP4NLrmBbps/Y6zb5k/d4GxkP6OHLutt+FidLM4ll7XAZ4Gfm9nOYOzLwLsB3P07wKeA3zOzMeAEsN7dvQX1ikiOTd6sbDSZUmu/JMfqZHFSLk8BVmfOnYDuVIjkTNIIYVxP7/0arxx6GGcCo4v3DXyStefenOgzLhi4KnI/NfD4Yt8UFZFiSRohjOvpvV9j16GHpradianttefe3PTxpXH66r9ISSWNEMb1yqFZ+Yg5x6X11NBFSipphDAuZ6KhcWk9NXSRkkoaIYzLarSPWuPSevqTFymppBHCuN438MmGxqX1dFNUpKSSRgjjmrzxGZVykWyooYt0mLirGkaNhf9nsPbcm2c18DhRybhxylbHLpPKa11q6CIlFXcFwyf23Y6744zNOS/pqonh/eLGKVsdu0wqr3WBrqGLlFbcFQwn/J2pZj7XvKSrJob3ixunbHXsMqm81gVq6CKl1YpVCZOumlg9HjdO2erYZVJ5rQvU0EVKqxWrEiZdNbF6PG6cstWxy6TyWheooYuUVtwVDLusFwvdTku60mGcqGTcOGWrY5dJ5bUu0E1RkdJqZAXDuGNJV02s3i9unLLVscuk8loXgGW1yu3qlSv98Y0bM/nsTnXvqs9lXYKUTNHjh1HyXutNa+Ztd/fBqPd0hi4iiRQ9fhilSLVG0TV0EUmk6PHDKEWqNYoauogkUvT4YZQi1RpFDV1EEil6/DBKkWqNooYuIokUPX4YpUi1RtFNURFJpOjxwyhFqjWKGrqIJFbrwc5J5+VBkWoNU0MXkUjhPPaKM/4Ne44+lcoXkvKS9c5LHWlRQxeRWaLy2LsOPTT1fiNL8SZdPrfV8lJHmnRTVERmicpjh8Vdijfp8rmtlpc60qSGLiKzpJ27TrJ8bqvlpY40qaGLyCxp566TLJ/banmpI01q6CIyS1QeOyzuUrxJl89ttbzUkSbdFBWRWaLy2GmlXPKS9c5LHWmqu3yuma0A7gOWAhPAZnffFJpjwCbgauA48Hl33zHXcbV8bvtp+dxyaWWssOiaiSPmPcrY7PK5Y8AfuvsOM+sHtpvZj9z95ao5VwEXBj8fBL4d/FNEWiBOrDAqQhgVNSx6VC+smThi0aOMda+hu/u+ybNtdx8FdgHnhaZdC9znFc8AA2a2LPVqRQSIFyuMihBGRQ2LHtULayaOWPQoY0M3Rc1sJbAaeDb01nnAnqrtYWY3fcxsg5kNmdnQm6OjjVUqIlNaGSssumbiiEWPMsZu6GZ2BrAFuMndj4Tfjthl1sV5d9/s7oPuPriov7+xSkVkSitjhUXXTByx6FHGWA3dzHqpNPPvuvvDEVOGgRVV28uBvc2XJyJR4sQKoyKEUVHDokf1wpqJIxY9ylj3pmiQYLkb2OXuX68xbStwg5k9QOVm6GF335demSJSrZWxwqJrJo5Y9ChjnJTLWuCzwM/NbGcw9mXg3QDu/h1gG5XI4m4qscUvpF+qiFQLL/O6+9Cj7Dn61Iw5+4/v5NjYCOAcGxth//GdrD335tyufhiWtK5mlsAt9fK57v4U0dfIq+c48MW0ihKRxkTF7f5x760441NznImpaOPac2+ec988RPXyWlee6av/IiUQFberbubVXjk08zZYXqN6ea0rz9TQRUqgkVidMxFr36yjenmtK8/U0EVKoJFYnYV+7fMa1ctrXXmmhi5SAlFxO6M7cu77Bj5Zd988RPXyWleeabVFkRKoFbfbf3wnrxx6GGcCo4v3DXxyxg3RufbN+sZjXuvKMzV0kQJ48Re/y3Nvb2eCyl+r15x2KYf73jurWa//V4/M2ncym97Xs4RzFqyKPH6cqN7Te7826/POWbAqVsONEz+sNSdOA89r7LLd1NBFcu7FX/wuz7y9HaySHp6AyvY70ytUR0US04z9Pb33azNWc5z8vF2HtjC5yket48epo5NXSEyTrqGL5NxzVc18Sng7UB1JTDP2F446Tpu5ZFPU8ePU0ckrJKZJDV0k5ybqT5lSHUlMM/YXjjrOJXz8OHV08gqJaVJDF8m5Rn5JqyOJacb+wlHHuYSPH6eOTl4hMU1q6CI5t+a0SyH8qMgaj46sjiSmGfsLRx2nzbz0E3X8OHV08gqJadJNUZGcu/jCv4CYKZfqSGKasb/J4yZJucSpo5NXSExT3YdEt4oeEt1+eki0SPE1+5BoKYkbP/DXWZfQ8R55bRebXnya14+PsnRBPzdevJZ1578/nYO/PARPbIMjB2HhWfCRq+GiyN97KbCb5nhPDV2kTR55bRe3bf8xJ8crD27ed3yU27b/GKD5pv7yEPz992AseAD0kYOVbVBT7yC6KSrSJptefHqqmU86OT7Gphefbv7gT2ybbuaTxt6pjEvHUEMXaZPXj482NN6QIwcbG5dSUkMXaZOlC/obGm/IwrMaG5dSUkMXaZMbL17L/O6Zt63md/dw48Vrmz/4R66Gnt6ZYz29lXHpGLopKtImkzc+W5JymbzxqZRLR1NDF2mjdceOsW7P3umm+55jsycljR9eNDh7Xk6ijC2Na8oUNXSRdokTLUwzfpiTKGNL45oyg66hi7RLnGhhmvHDnEQZWxrXlBnU0EXaJU60MM34YU6ijC2Na8oMaugi7RInWphm/DAnUcaWxjVlBjV0kXaJEy1MM36YkyhjS+OaMoNuioq0S5xoYZrxw5xEGVsa15QZ6jZ0M7sHuAYYcfeLI96/HPg/wC+DoYfd/Y40ixTJlWaigI//LRw7Unl95CA8tmX2seJ67EF44RnwCbAuHnn/v2aTjc1omvT1sWnFubx+vL8y1tfHusb/jZu27vz3q4G3QZwz9L8E7gTum2POk+5+TSoVieRZM1HAb9063cwnvX2y8jN5rEfuhy6D8fG5j//Yg7Dzp1ObjyyYz23H3uJkV+Uq6r7jo/y353+ImfHOxMTUmOKC5Vb3Grq7PwEcaEMtIvnXTBQw3Myj+MR0M5/r+C88M2Nz01kDU818ajf3qWY+SXHBckvrpuiHzOwFM3vUzD5Qa5KZbTCzITMbenNUkSUpoKyigOHj+8xG/XpPd+xDKS5YXmk09B3A+e5+CfBN4Ae1Jrr7ZncfdPfBRf2KLEkBZRUFDB/fZv7qLh0LndXPQXHB8mq6obv7EXc/GrzeBvSa2aKmKxPJo2aigH0L68+xLugOnW1HHf+Sy2Zs3njwEPNDl1d6zOgNXYZRXLDcmm7oZrbUzCx4vSY45lvNHlckly4ahCv/3fQZ88KzKttxUi5fvH12Uz9t/sxjrfs0XLW+/vGvuA5WfXjqTH3d8ZPc1vculi3ox4BlC/r5ym/+W/774BUzxm679GO6IVpicWKL9wOXA4vMbBi4FegFcPfvAJ8Cfs/MxoATwHp395ZVLJKWNFc1jBKKFXLJZZWmHlVDteFfwujhyuvRw5VtmF3rFddVfgLrgp+wVjZwraKYL5ZV7129cqU/vnFjJp/dqQY+E//GWemF44dQubQR92y7nlCscMqqD0834agarGvWDc/I8TRrTSi8iiJULunobwGtZX3Xb3f3yP/w+uq/dKZWr0QYihVGjkfVENXMo8Zz8ABoraKYP2ro0plaHT+M05ib/ayMHwCtVRTzRw1dOlOr44dW41ererzZz8r4AdBaRTF/1NClM7V6JcJQrDByPKqGOP8jgFw8AFqrKOaPVluUztTqlQgnb3yGUy5VqZSaNQz/cvZ+y9+T+aqJYVpFMX+UcukgSrmIFN9cKRedoUuxpf1U+wf+J/zzL6a3+xbC8aMzz5Zh9hl01FjUWTXMHHvv++HVXbk685bi0hl6ByndGXraWfJwM2+WGVT/fnV3w4TXTsBALvLlkm/KoUs5pZ0lT7OZw8xmDpVlcedq5pCLfLkUlxq6FFdOnmqfuqLXL5lRQ5fiyslT7VNX9PolM2roUlxpZ8nffWHzNVWrLEI6rbu7ds58Ug7y5VJcauhSXM0sZRtl/X+Z3dT7Fk43YeuqLK5VtWztnGPr/sPM2q5aX1ket3ps1YfTq186nmKLUmxxl7KNEhV5/PU1cOjNuWOELw9NRw37z6zEEy8anPmlocl5SepNO4opHUMNXTpTOPJ45CA8cj902fRDmo8crMyB6YYatV94TiPz4tQVZz8RdMlFOlWtpWvHQ8/mDMcI40Ylk0YqW72sr5SaGrp0pkaigdVz40Ylk0YqyxrFlLZQQ5fO1Eg0sHpu3Khk0khlWaOY0hZq6NKZai1d2x1aHiEcI4wblUwaqWz1sr5SaropKp2p1tK1UWPVNyPjLrubdHneVi/rK6Wmhi7pymvkLqquKHFihXGjkkkjlc1EMaWjqaFLevIauYuq69EHZq58mJdaRZqga+iSnrxG7qLqilr5MA+1ijRBDV3Sk9fIXdKIokjBqKFLevIauUsaURQpGDV0SU9eI3dRdUWtfJiHWkWaoJuikp68Ru6SRhRFCqZuQzeze4BrgBF3vzjifQM2AVcDx4HPu/uOtAuVgkgauXvswdkPWQ6vXhh3Xq3oZFRdUSsp6iHOUlBxLrn8JXDlHO9fBVwY/GwAvt18WdJRHnsQdv50OnXiE5Xtxx5sfN5kRHHy5uZkHDFqKduwqH13/jTZsUQyULehu/sTwIE5plwL3OcVzwADZrYsrQKlA7zwTLzxOPOaiU5G7RumaKPkWBo3Rc8D9lRtDwdjs5jZBjMbMrOhN0dHU/hoKYVwHrzWeJx5zUQn40YWFW2UnEqjoVvEmEdNdPfN7j7o7oOL+vtT+GgphVrP2QyPx5nXTHQybmRR0UbJqTQa+jCwomp7ObA3heNKp7jksnjjceY1E52M2jdM0UbJsTQa+lbgP1nFZcBhd9+XwnGlU1xxXfRDlsPplTjzmnlwdNS+eoizFIi5R14dmZ5gdj9wObAI2A/cCvQCuPt3gtjinVSSMMeBL7h73RjA6pUr/fGNG5sqXhoz8Jnu+pNEJNes7/rt7h55VlE3h+7un67zvgNfTFibiIikRF/9FxEpCTV0EZGSUEMXESkJNXQRkZJQQxcRKQk1dBGRklBDFxEpCTV0EZGSUEMXESkJNXQRkZJQQxcRKQk1dBGRklBDFxEpCTV0EZGSUEMXESkJNXQRkZJQQxcRKQk1dBGRklBDFxEpCTV0EZGSqPuQaKlv2+jpfPNAP6+PdbO0Z5wvnT3K1f0nsi5LRDqMGnqTto2ezh1vnMlJr/xlZ99YD3e8cSaAmrqItJUuuTTpmwf6p5r5pJPexTcP9GdUkYh0KjX0Jr0+1t3QuIhIq6ihN2lpz3hD4yIiraKG3qQvnT3KfJuYMTbfJvjS2aMZVSQinUo3RZs0eeNTKRcRyVqshm5mVwKbgG7gLnf/09D7nwf+HPiXYOhOd78rxTpz7er+E2rgIpK5ug3dzLqBbwG/DQwDz5vZVnd/OTT1b9z9hhbUWEjKpotIu8W5hr4G2O3ur7r728ADwLWtLavYJrPp+8Z6cGwqm75t9PSsSxOREovT0M8D9lRtDwdjYb9jZv9kZg+Z2YpUqisoZdNFJAtxGrpFjHlo+2+Ble7+68CPgXsjD2S2wcyGzGzozdHypkCUTReRLMRp6MNA9Rn3cmBv9QR3f8vdTwWbfwFcGnUgd9/s7oPuPriov7xnq8qmi0gW4jT054ELzew9ZnYasB7YWj3BzJZVbX4c2JVeicWjbLqIZKFuysXdx8zsBuCHVGKL97j7S2Z2BzDk7luB/2pmHwfGgAPA51tYc+4pmy4iWYiVQ3f3bcC20NgtVa9vBm5Ot7T2ihsz3DB8Ns+dmje1vWbeKT5x5olZ+6b5mSIiceibosRfAne6mU/fJ37u1DyeH5mHB2P7xnq4dWQAxxmj9vG07K6IpE1ruRA/Zhhu5hU21cwnvYNNNfNax1O0UUTSpoZO+2KG1cdTtFFE0qaGTvtihtXHU7RRRNKmhk78mOGaeaeY/Z0qx0JjvTg9zH08RRtFJG1q6FRuQt6y+DDLesYwnGU9Y9yy+PCsm5Oblx+oauqVnzXzTvHVJYdm7Hv7kkPcsWTu48X9TBGRuJRyCfzsRC/7x7pxYP9YNz870csPDp8eGVHcc6BnKmr4iTNPRC6fG2chLi27KyJpUkMHvjqykAdH+5hMsExAsA31IopRUUNFEkUkC7rkAmypaubTLHIsHFGMihoqkigiWVBDh9Dty8aFo4aKJIpIFtTQaf4PIRw1VCRRRLKghg78Tv8xouKIcSKKUVFDRRJFJAtq6MDGJUe4rv8YXUET78K5rv9YrIhiVNRQkUQRyULpUi5xVzD86shCtoz2MUHl/2p9TExdS58Ath8/jdfGZ/7xbD91Gi+MnMapqZRLN7ePnMk33ujnDZ++Pr7Yxvn9xfXPxrXaooikqVQNPW5cMCqmOEoX1amWV8d7g1fTY+PBT/XYKQia+fTYG97Nl0cGpsa02qKItEOpLrnEjQvWjimGt9Mb02qLItJqpWroceOCzcYUk9JqiyLSSqVq6HHjgln9S2u1RRFppVI19LhxwdoxxfB2emNabVFEWq1UDT1uXDAqptjPBNURxfd2v0N31TY43TjzQmPzcBbb+IyxxTbOn9SJNyraKCJpM/fw2WV7rF650h/fuDGTz+5UA5/R9XmRorO+67e7+2DUe6WKLdaSNO8dzqpXLtUwa2zjkiOtLF9EJJbSN/Skee84S+pWj6mpi0jWSnUNPUrSvHcjS+pumWr0IiLZKX1DT5r3biSrnlWuXUSkWukbetK8dyN/MKX/QxSRQih9L0qa925kSd3Jm6UiIlkq/U3RyRufjaZcJm9yKuUiIkURq6Gb2ZXAJqAbuMvd/zT0/jzgPuBS4C3g37v7r9ItNbmr+08k+sLOxiVHIpu1GriI5FHdSy5m1g18C7gKuAj4tJldFJp2PXDQ3S8AvgH8WdqFiojI3OJcQ18D7Hb3V939beAB4NrQnGuBe4PXDwEfNbNwvk9ERFooziWX84A9VdvDwAdrzXH3MTM7DLwLeLN6kpltADYEm0fP2rDh/yYpOqZF4c8vmPTr31B/Sor055+dItcOqr+e82u9EaehR51ph6Mecebg7puBzTE+s2lmNlRrvYMiUP3ZKnL9Ra4dVH8z4lxyGQZWVG0vB/bWmmNmPcCZwIE0ChQRkXjiNPTngQvN7D1mdhqwHtgamrMV+Fzw+lPATzyrZRxFRDpU3UsuwTXxG4AfUokt3uPuL5nZHcCQu28F7gb+ysx2UzkzX9/KomNqy6WdFlL92Spy/UWuHVR/Ypmthy4iIukq/Vf/RUQ6hRq6iEhJlK6hm9k9ZjZiZi9mXUsSZrbCzB43s11m9pKZ3Zh1TXGZ2Xwze87MXghqvz3rmpIws24z+5mZ/V3WtTTKzH5lZj83s51mNpR1PY0yswEze8jMXgl+Bz6UdU1xmdmvBX/ukz9HzOymttZQtmvoZvYR4Chwn7tfnHU9jTKzZcAyd99hZv3AduAT7v5yxqXVFXw7uM/dj5pZL/AUcKO7P5NxaQ0xsz8ABoGF7n5N1vU0wsx+BQy6eyG/mGNm9wJPuvtdQapugbsfyrquRgVLpvwL8EF3f61dn1u6M3R3f4ICZ+DdfZ+77whejwK7qHwTN/e84miw2Rv8FOqMwcyWA+uAu7KupdOY2ULgI1RSc7j720Vs5oGPAv+vnc0cStjQy8TMVgKrgWezrSS+4HLFTmAE+JG7F6b2wP8A/ojiPojKgcfMbHuw1EaRvBd4A/hfwSWvu8ysqM93XA/c3+4PVUPPKTM7A9gC3OTuhVmv193H3X0VlW8UrzGzwlz2MrNrgBF33551LU1Y6+6/QWV11C8GlyCLogf4DeDb7r4aOAb8cbYlNS64VPRx4MF2f7Yaeg4F15+3AN9194ezrieJ4K/K/wBcmXEpjVgLfDy4Dv0A8Ftm9tfZltQYd98b/HME+D6V1VKLYhgYrvpb3UNUGnzRXAXscPf97f5gNfScCW4s3g3scvevZ11PI8xssZkNBK9PBz4GvJJtVfG5+83uvtzdV1L5K/NP3P0/ZlxWbGbWF9xIJ7hUcQVQmLSXu78O7DGzXwuGPgrkPgwQ4dNkcLkFSvgIOjO7H7gcWGRmw8Ct7n53tlU1ZC3wWeDnwbVogC+7+7YMa4prGXBvcIe/C/ieuxcu+ldg5wDfDx5F0AP8b3f/+2xLatiXgO8Gly1eBb6QcT0NMbMFwG8D/zmTzy9bbFFEpFPpkouISEmooYuIlIQauohISaihi4iUhBq6iEhJqKGLiJSEGrqISEn8fwoRWMTYJE5nAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "plot_decision_boundary(dt_clf, axis=[0.5, 7.5, 0, 3])\n",
    "plt.scatter(X[y==0,0], X[y==0,1])\n",
    "plt.scatter(X[y==1,0], X[y==1,1])\n",
    "plt.scatter(X[y==2,0], X[y==2,1])\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 模拟使用基尼系数划分"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "from collections import Counter\n",
    "from math import log\n",
    "\n",
    "def split(X, y, d, value):\n",
    "    index_a = (X[:,d] <= value)\n",
    "    index_b = (X[:,d] > value)\n",
    "    return X[index_a], X[index_b], y[index_a], y[index_b]\n",
    "\n",
    "def gini(y):\n",
    "    counter = Counter(y)\n",
    "    res = 1.0\n",
    "    for num in counter.values():\n",
    "        p = num / len(y)\n",
    "        res -= p**2\n",
    "    return res\n",
    "\n",
    "def try_split(X, y):\n",
    "    \n",
    "    best_g = float('inf')\n",
    "    best_d, best_v = -1, -1\n",
    "    for d in range(X.shape[1]):\n",
    "        sorted_index = np.argsort(X[:,d])\n",
    "        for i in range(1, len(X)):\n",
    "            if X[sorted_index[i], d] != X[sorted_index[i-1], d]:\n",
    "                v = (X[sorted_index[i], d] + X[sorted_index[i-1], d])/2\n",
    "                X_l, X_r, y_l, y_r = split(X, y, d, v)\n",
    "                p_l, p_r = len(X_l) / len(X), len(X_r) / len(X)\n",
    "                g = p_l * gini(y_l) + p_r * gini(y_r)\n",
    "                if g < best_g:\n",
    "                    best_g, best_d, best_v = g, d, v\n",
    "                \n",
    "    return best_g, best_d, best_v"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "best_g = 0.3333333333333333\n",
      "best_d = 0\n",
      "best_v = 2.45\n"
     ]
    }
   ],
   "source": [
    "best_g, best_d, best_v = try_split(X, y)\n",
    "print(\"best_g =\", best_g)\n",
    "print(\"best_d =\", best_d)\n",
    "print(\"best_v =\", best_v)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "X1_l, X1_r, y1_l, y1_r = split(X, y, best_d, best_v)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.0"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gini(y1_l)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.5"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gini(y1_r)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "best_g = 0.1103059581320451\n",
      "best_d = 1\n",
      "best_v = 1.75\n"
     ]
    }
   ],
   "source": [
    "best_g2, best_d2, best_v2 = try_split(X1_r, y1_r)\n",
    "print(\"best_g =\", best_g2)\n",
    "print(\"best_d =\", best_d2)\n",
    "print(\"best_v =\", best_v2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "X2_l, X2_r, y2_l, y2_r = split(X1_r, y1_r, best_d2, best_v2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.1680384087791495"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gini(y2_l)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.04253308128544431"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gini(y2_r)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 信息熵VS基尼系数"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "+ 熵信息的计算比基尼系数稍慢，因为信息熵是log运算，基尼系数是平方运算\n",
    "+ scikit-learn中默认为基尼系数\n",
    "+ 大多数时候二者没有特别的效果优劣"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
